-- Databricks notebook source
-- MAGIC %md
-- MAGIC # Delta Lake & Databricks Comprehensive Demo
-- MAGIC
-- MAGIC ## Topics Covered:
-- MAGIC 1. Managed vs External Tables
-- MAGIC 2. CTAS (CREATE TABLE AS SELECT)
-- MAGIC 3. DML Operations (INSERT, UPDATE, DELETE, MERGE)
-- MAGIC 4. Delta Table History
-- MAGIC 5. Time Travel (Version & Timestamp)
-- MAGIC 6. RESTORE Table
-- MAGIC 7. Shallow Clone vs Deep Clone
-- MAGIC 8. Z-Order Indexing & OPTIMIZE
-- MAGIC 9. Views (Temporary, Global Temporary, Stored/Permanent)
-- MAGIC 10. VACUUM

-- COMMAND ----------

-- See what external locations already exist
SHOW EXTERNAL LOCATIONS;

-- COMMAND ----------

-- Get managed table location
DESCRIBE DETAIL employees_managed;

-- Get external table location  
DESCRIBE DETAIL employees_external;

-- COMMAND ----------



-- COMMAND ----------

-- MAGIC %md
-- MAGIC ## Setup: Create Catalog and Schema

-- COMMAND ----------

-- DBTITLE 1,Cell 4
-- Use this to start your demo immediately
CREATE CATALOG IF NOT EXISTS main;

USE CATALOG main;
CREATE SCHEMA IF NOT EXISTS delta_demo;
USE SCHEMA delta_demo;

SELECT current_catalog(), current_schema();

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ---
-- MAGIC # Section 1: Managed vs External Tables
-- MAGIC ---

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 1.1 Managed Table
-- MAGIC - Data stored in metastore's managed storage
-- MAGIC - DROP TABLE deletes both metadata AND data

-- COMMAND ----------

-- Create a MANAGED table (no LOCATION specified)
CREATE OR REPLACE TABLE employees_managed (
    emp_id INT,
    emp_name STRING,
    department STRING,
    salary DECIMAL(10,2),
    hire_date DATE,
    is_active BOOLEAN
)
USING DELTA
COMMENT 'Managed employee table - Delta Lake manages the data files';

-- Insert sample data
INSERT INTO employees_managed VALUES
(1, 'Rahul Sharma', 'Engineering', 75000.00, '2022-01-15', true),
(2, 'Priya Patel', 'Engineering', 82000.00, '2021-06-20', true),
(3, 'Amit Kumar', 'Sales', 65000.00, '2023-03-10', true),
(4, 'Sneha Reddy', 'Marketing', 70000.00, '2022-08-05', true),
(5, 'Vikram Singh', 'Engineering', 90000.00, '2020-11-12', true),
(6, 'Anjali Gupta', 'HR', 55000.00, '2023-01-25', true),
(7, 'Ravi Verma', 'Sales', 72000.00, '2021-09-30', true),
(8, 'Deepika Nair', 'Marketing', 68000.00, '2022-04-18', true),
(9, 'Karthik Iyer', 'Engineering', 85000.00, '2021-02-14', true),
(10, 'Meera Joshi', 'HR', 58000.00, '2023-07-01', true);

-- COMMAND ----------

SELECT * FROM employees_managed;

-- COMMAND ----------

-- Verify: Check if table is MANAGED
DESCRIBE EXTENDED employees_managed;

-- Look for "Type" in the output - should show "MANAGED"

-- COMMAND ----------

-- Verify: Check table location
DESCRIBE DETAIL employees_managed;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 1.2 External Table
-- MAGIC - Data stored in user-specified location
-- MAGIC - DROP TABLE deletes only metadata, data files remain

-- COMMAND ----------

-- Check existing credentials
SHOW STORAGE CREDENTIALS;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC

-- COMMAND ----------

-- DBTITLE 1,Untitled
-- Setup
USE CATALOG main;
CREATE SCHEMA IF NOT EXISTS delta_demo;
USE SCHEMA delta_demo;

-- Create EXTERNAL table
CREATE OR REPLACE TABLE employees_external (
    emp_id INT,
    emp_name STRING,
    department STRING,
    salary DECIMAL(10,2),
    hire_date DATE,
    is_active BOOLEAN
)
USING DELTA
LOCATION 'abfss://external-tables-demo@tastybytesstgacc.dfs.core.windows.net/employees_external'
COMMENT 'External employee table - We manage the data location';

-- Insert data
INSERT INTO employees_external VALUES
(1, 'Rahul Sharma', 'Engineering', 75000.00, '2022-01-15', true),
(2, 'Priya Patel', 'Engineering', 82000.00, '2021-06-20', true),
(3, 'Amit Kumar', 'Sales', 65000.00, '2023-03-10', true),
(4, 'Sneha Reddy', 'Marketing', 70000.00, '2022-08-05', true),
(5, 'Vikram Singh', 'Engineering', 90000.00, '2020-11-12', true);

-- Verify data
SELECT * FROM employees_external;



-- COMMAND ----------

-- Verify: Check if table is EXTERNAL
DESCRIBE EXTENDED employees_external;

-- Look for "Type" in output - should show "EXTERNAL"

-- COMMAND ----------

-- Verify: Compare both tables
SELECT 'MANAGED' as table_type, COUNT(*) as row_count FROM employees_managed
UNION ALL
SELECT 'EXTERNAL' as table_type, COUNT(*) as row_count FROM employees_external;

-- COMMAND ----------

-- Get managed table location
DESCRIBE DETAIL employees_managed;



-- COMMAND ----------

-- Get external table location  
DESCRIBE DETAIL employees_external;

-- COMMAND ----------

SHOW TABLES IN delta_demo;

-- COMMAND ----------

-- Drop managed table
DROP TABLE employees_managed;

-- Drop external table
DROP TABLE employees_external;

-- COMMAND ----------

-- List files in external table location
LIST 'abfss://external-tables-demo@tastybytesstgacc.dfs.core.windows.net/employees_external';

-- COMMAND ----------

-- List files in internal table location
LIST 'abfss://meta-store@tastybytesstgacc.dfs.core.windows.net/0f377683-cb5d-436c-8fee-573d30bf513d/tables/aa64c9bb-d5ed-4bcf-aa0e-6bbb08c708aa'; -- Ensure storage account key is configured correctly in cluster settings.

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ---
-- MAGIC # Section 2: CTAS (CREATE TABLE AS SELECT)
-- MAGIC ---

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 2.1 Simple CTAS

-- COMMAND ----------

-- Create table from SELECT query
CREATE OR REPLACE TABLE engineering_team AS
SELECT 
    emp_id,
    emp_name,
    salary,
    hire_date
FROM employees_managed
WHERE department = 'Engineering';

-- COMMAND ----------

-- Verify CTAS result
SELECT * FROM engineering_team;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 2.2 CTAS with Transformations

-- COMMAND ----------

-- CTAS with computed columns and filtering
CREATE OR REPLACE TABLE employee_summary AS
SELECT 
    department,
    COUNT(*) AS employee_count,
    ROUND(AVG(salary), 2) AS avg_salary,
    MIN(salary) AS min_salary,
    MAX(salary) AS max_salary,
    MIN(hire_date) AS earliest_hire,
    MAX(hire_date) AS latest_hire
FROM employees_managed
WHERE is_active = true
GROUP BY department;

-- COMMAND ----------

-- Verify
SELECT * FROM employee_summary ORDER BY avg_salary DESC;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 2.3 CTAS with Table Properties

-- COMMAND ----------

-- CTAS with additional options
CREATE OR REPLACE TABLE high_earners
USING DELTA
PARTITIONED BY (department)
COMMENT 'Employees earning above 70000'
TBLPROPERTIES ('quality' = 'silver', 'delta.autoOptimize.optimizeWrite' = 'true')
AS
SELECT * FROM employees_managed WHERE salary > 70000;

-- COMMAND ----------

-- Verify table properties
DESCRIBE EXTENDED high_earners;

-- COMMAND ----------

-- Verify data
SELECT * FROM high_earners ORDER BY salary DESC;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ---
-- MAGIC # Section 3: DML Operations & Version Creation
-- MAGIC ---

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 3.1 Check Initial State (Version 0)

-- COMMAND ----------

-- View initial history
DESCRIBE HISTORY employees_managed;

-- COMMAND ----------

select * from employees_managed;

-- COMMAND ----------

-- Record initial count
SELECT 'Initial State' as state, COUNT(*) as row_count, SUM(salary) as total_salary 
FROM employees_managed;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 3.2 UPDATE Operation (Creates Version 1)

-- COMMAND ----------

-- Give 10% raise to Engineering department
UPDATE employees_managed
SET salary = salary * 1.10
WHERE department = 'Engineering';

-- COMMAND ----------

-- Verify UPDATE
SELECT emp_name, department, salary 
FROM employees_managed 
WHERE department = 'Engineering'
ORDER BY salary DESC;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 3.3 DELETE Operation (Creates Version 2)

-- COMMAND ----------

-- Remove inactive employees (let's first make one inactive)
UPDATE employees_managed
SET is_active = false
WHERE emp_id = 10;

-- Now delete inactive employees
DELETE FROM employees_managed
WHERE is_active = false;

-- COMMAND ----------

-- Verify DELETE
SELECT COUNT(*) as remaining_employees FROM employees_managed;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 3.4 INSERT Operation (Creates Version 3)

-- COMMAND ----------

-- Add new employees
INSERT INTO employees_managed VALUES
(11, 'Arjun Menon', 'Engineering', 78000.00, '2024-01-10', true),
(12, 'Kavitha Rao', 'Sales', 67000.00, '2024-02-15', true),
(13, 'Suresh Pillai', 'Marketing', 71000.00, '2024-03-20', true);

-- COMMAND ----------

-- Verify INSERT
SELECT * FROM employees_managed WHERE emp_id >= 11;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 3.5 MERGE Operation (Upsert) - Creates Version 4

-- COMMAND ----------

-- Create source data for merge
CREATE OR REPLACE TEMP VIEW employee_updates AS
SELECT * FROM VALUES
    (1, 'Rahul Sharma', 'Engineering', 85000.00, '2022-01-15', true),    -- Update existing
    (14, 'Lakshmi Krishnan', 'HR', 62000.00, '2024-04-01', true),        -- Insert new
    (15, 'Mohan Das', 'Engineering', 92000.00, '2024-04-15', true)       -- Insert new
AS t(emp_id, emp_name, department, salary, hire_date, is_active);

-- Perform MERGE (Upsert)
MERGE INTO employees_managed AS target
USING employee_updates AS source
ON target.emp_id = source.emp_id
WHEN MATCHED THEN
    UPDATE SET 
        salary = source.salary,
        department = source.department
WHEN NOT MATCHED THEN
    INSERT (emp_id, emp_name, department, salary, hire_date, is_active)
    VALUES (source.emp_id, source.emp_name, source.department, source.salary, source.hire_date, source.is_active);

-- COMMAND ----------

-- Verify MERGE
SELECT * FROM employees_managed ORDER BY emp_id;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ---
-- MAGIC # Section 4: Delta Table History
-- MAGIC ---

-- COMMAND ----------

-- View complete history
DESCRIBE HISTORY employees_managed;

-- COMMAND ----------

-- View specific columns from history
SELECT 
    version,
    timestamp,
    operation,
    operationParameters,
    operationMetrics
FROM (DESCRIBE HISTORY employees_managed)
ORDER BY version;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ---
-- MAGIC # Section 5: Time Travel
-- MAGIC ---

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 5.1 Time Travel by VERSION

-- COMMAND ----------

SELECT * FROM employees_managed VERSION AS OF 0;

-- COMMAND ----------

-- Query original data (Version 0 - after initial INSERT)
SELECT 'Version 0 (Original)' as version_info, COUNT(*) as count, ROUND(SUM(salary),2) as total_salary
FROM employees_managed VERSION AS OF 0;

-- COMMAND ----------

SELECT * FROM employees_managed VERSION AS OF 1;

-- COMMAND ----------

-- Query after UPDATE (Version 1)
SELECT 'Version 1 (After Raise)' as version_info, COUNT(*) as count, ROUND(SUM(salary),2) as total_salary
FROM employees_managed VERSION AS OF 1;

-- COMMAND ----------

-- Compare specific employee across versions
SELECT 
    'Version 1' as version, emp_name, salary 
FROM employees_managed VERSION AS OF 1 
WHERE emp_id = 1

UNION ALL

SELECT 
    'Current' as version, emp_name, salary 
FROM employees_managed 
WHERE emp_id = 1;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 5.2 Time Travel by TIMESTAMP

-- COMMAND ----------

-- First, get timestamps from history
SELECT version, timestamp, operation 
FROM (DESCRIBE HISTORY employees_managed)
ORDER BY version;

-- COMMAND ----------

-- Query using timestamp (replace with actual timestamp from above)
-- Example: SELECT * FROM employees_managed TIMESTAMP AS OF '2024-01-15T10:30:00Z';

-- Using relative time (10 minutes ago)
SELECT COUNT(*) as count_10_min_ago
FROM employees_managed TIMESTAMP AS OF (current_timestamp() - INTERVAL 10 MINUTES);

-- COMMAND ----------

DESCRIBE HISTORY employees_managed;

-- COMMAND ----------

-- Use a timestamp AT or BEFORE the latest commit
SELECT COUNT(*) as count_at_time
FROM employees_managed TIMESTAMP AS OF '2026-01-27T11:45:15.000+00:00';

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 5.3 Compare Versions (Audit Query)

-- COMMAND ----------

-- Compare Version 0 vs Current
WITH v1 AS (
    SELECT emp_id, emp_name, salary as old_salary 
    FROM employees_managed VERSION AS OF 1
),
current_data AS (
    SELECT emp_id, emp_name, salary as new_salary 
    FROM employees_managed
)
SELECT 
    COALESCE(v1.emp_id, c.emp_id) as emp_id,
    COALESCE(v1.emp_name, c.emp_name) as emp_name,
    v1.old_salary,
    c.new_salary,
    CASE 
        WHEN v1.emp_id IS NULL THEN 'NEW'
        WHEN c.emp_id IS NULL THEN 'DELETED'
        WHEN v1.old_salary != c.new_salary THEN 'UPDATED'
        ELSE 'UNCHANGED'
    END as change_type
FROM v1
FULL OUTER JOIN current_data c ON v1.emp_id = c.emp_id
WHERE v1.old_salary != c.new_salary 
   OR v1.emp_id IS NULL 
   OR c.emp_id IS NULL
ORDER BY emp_id;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ---
-- MAGIC # Section 6: RESTORE Table
-- MAGIC ---

-- COMMAND ----------

-- Check current state before restore
SELECT 'Before Restore' as state, COUNT(*) as count, ROUND(SUM(salary),2) as total 
FROM employees_managed;

-- COMMAND ----------

-- Get current version number
SELECT MAX(version) as current_version FROM (DESCRIBE HISTORY employees_managed);

-- COMMAND ----------

-- Restore to Version 6 (original state)
RESTORE TABLE employees_managed TO VERSION AS OF 6;

-- COMMAND ----------

-- Verify restore
SELECT 'After Restore to V6' as state, COUNT(*) as count, ROUND(SUM(salary),2) as total 
FROM employees_managed;

-- COMMAND ----------

-- Check history - RESTORE creates a new version!
DESCRIBE HISTORY employees_managed;

-- COMMAND ----------

select * from employees_managed;

-- COMMAND ----------

-- Restore back to latest before restore (use version number from history)
-- Replace X with the version before RESTORE
-- RESTORE TABLE employees_managed TO VERSION AS OF X;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ---
-- MAGIC # Section 7: Shallow Clone vs Deep Clone
-- MAGIC ---

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 7.1 SHALLOW CLONE
-- MAGIC - Only copies metadata
-- MAGIC - References original data files
-- MAGIC - Fast and space-efficient
-- MAGIC - Good for: testing, experimentation

-- COMMAND ----------

-- Create shallow clone of current state
CREATE OR REPLACE TABLE employees_shallow_clone
SHALLOW CLONE employees_managed;

-- COMMAND ----------

-- Verify shallow clone
SELECT 'Original' as source, COUNT(*) as count FROM employees_managed
UNION ALL
SELECT 'Shallow Clone' as source, COUNT(*) as count FROM employees_shallow_clone;

-- COMMAND ----------

-- Check clone details
DESCRIBE EXTENDED employees_shallow_clone;

-- COMMAND ----------

-- Shallow clone at specific version
CREATE OR REPLACE TABLE employees_shallow_clone_v2
SHALLOW CLONE employees_managed VERSION AS OF 2;

-- Verify
SELECT COUNT(*) as clone_v2_count FROM employees_shallow_clone_v2;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 7.2 DEEP CLONE
-- MAGIC - Copies metadata AND data files
-- MAGIC - Completely independent copy
-- MAGIC - Takes more time and storage
-- MAGIC - Good for: production copies, backups, migration

-- COMMAND ----------

-- Create deep clone
CREATE OR REPLACE TABLE employees_deep_clone
DEEP CLONE employees_managed;

-- COMMAND ----------

-- Verify deep clone
SELECT 'Original' as source, COUNT(*) as count FROM employees_managed
UNION ALL
SELECT 'Deep Clone' as source, COUNT(*) as count FROM employees_deep_clone;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 7.3 Test Clone Independence

-- COMMAND ----------

-- Modify shallow clone
UPDATE employees_shallow_clone SET salary = salary + 5000 WHERE emp_id = 1;

-- Modify deep clone  
UPDATE employees_deep_clone SET salary = salary + 10000 WHERE emp_id = 1;

-- COMMAND ----------

-- Verify changes are independent
SELECT 
    'Original' as source, salary 
FROM employees_managed WHERE emp_id = 1
UNION ALL
SELECT 
    'Shallow Clone' as source, salary 
FROM employees_shallow_clone WHERE emp_id = 1
UNION ALL
SELECT 
    'Deep Clone' as source, salary 
FROM employees_deep_clone WHERE emp_id = 1;

-- COMMAND ----------

DESCRIBE DETAIL employees_managed;

-- COMMAND ----------

DESCRIBE DETAIL employees_shallow_clone;

-- COMMAND ----------

DESCRIBE DETAIL employees_deep_clone;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ---
-- MAGIC # Section 8: Z-ORDER Indexing & OPTIMIZE
-- MAGIC ---

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 8.1 Create Table with More Data for Optimization Demo

-- COMMAND ----------

-- Create a larger table for optimization demo
CREATE OR REPLACE TABLE sales_transactions (
    transaction_id BIGINT,
    product_id INT,
    customer_id INT,
    region STRING,
    sale_date DATE,
    quantity INT,
    unit_price DECIMAL(10,2),
    total_amount DECIMAL(12,2)
)
USING DELTA;

-- COMMAND ----------

-- Insert sample data (simulating many transactions)
INSERT INTO sales_transactions
SELECT 
    id as transaction_id,
    (id % 100) + 1 as product_id,
    (id % 1000) + 1 as customer_id,
    CASE (id % 4) 
        WHEN 0 THEN 'North'
        WHEN 1 THEN 'South'
        WHEN 2 THEN 'East'
        ELSE 'West'
    END as region,
    date_add('2023-01-01', CAST(id % 365 AS INT)) as sale_date,
    (id % 10) + 1 as quantity,
    ROUND(RAND() * 100 + 10, 2) as unit_price,
    ROUND((RAND() * 100 + 10) * ((id % 10) + 1), 2) as total_amount
FROM RANGE(10000);

-- COMMAND ----------

-- Verify data
SELECT COUNT(*) as total_rows, 
       COUNT(DISTINCT product_id) as unique_products,
       COUNT(DISTINCT customer_id) as unique_customers,
       COUNT(DISTINCT region) as unique_regions
FROM sales_transactions;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 8.2 Check Table Statistics Before Optimization

-- COMMAND ----------

-- Check file statistics before OPTIMIZE
DESCRIBE DETAIL sales_transactions;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 8.3 OPTIMIZE Command

-- COMMAND ----------

-- Optimize the table (compacts small files)
OPTIMIZE sales_transactions;

-- COMMAND ----------

-- Check after OPTIMIZE
DESCRIBE DETAIL sales_transactions;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 8.4 Z-ORDER Indexing
-- MAGIC - Co-locates related data in same files
-- MAGIC - Improves query performance for filtered columns
-- MAGIC - Best for: high-cardinality columns used in WHERE clauses

-- COMMAND ----------

-- Apply Z-ORDER on frequently filtered columns
OPTIMIZE sales_transactions
ZORDER BY (region, product_id);

-- COMMAND ----------

-- Verify Z-ORDER was applied - check history
SELECT version, operation, operationParameters 
FROM (DESCRIBE HISTORY sales_transactions)
WHERE operation = 'OPTIMIZE'
ORDER BY version DESC
LIMIT 5;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 8.5 Query Performance Comparison
-- MAGIC (Run these and compare execution times in Spark UI)

-- COMMAND ----------

-- Query that benefits from Z-ORDER
SELECT 
    region,
    product_id,
    SUM(total_amount) as total_sales,
    COUNT(*) as transaction_count
FROM sales_transactions
WHERE region = 'North' AND product_id BETWEEN 10 AND 20
GROUP BY region, product_id
ORDER BY total_sales DESC;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ---
-- MAGIC # Section 9: Views
-- MAGIC ---

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 9.1 Temporary View (Session-Scoped)
-- MAGIC - Exists only in current SparkSession
-- MAGIC - Not persisted to metastore
-- MAGIC - Disappears when session ends

-- COMMAND ----------

-- Create temporary view
CREATE OR REPLACE TEMP VIEW temp_high_salary_emp AS
SELECT emp_id, emp_name, department, salary
FROM employees_managed
WHERE salary > 75000;

-- COMMAND ----------

-- Query temporary view
SELECT * FROM temp_high_salary_emp ORDER BY salary DESC;

-- COMMAND ----------

-- Verify it's a temp view (won't appear in SHOW TABLES)
SHOW TABLES IN delta_demo;

-- COMMAND ----------

-- But you can see it with SHOW VIEWS
SHOW VIEWS;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 9.2 Global Temporary View (Application-Scoped)
-- MAGIC - Shared across all SparkSessions in same application
-- MAGIC - Stored in global_temp database
-- MAGIC - Survives session restart within same cluster

-- COMMAND ----------

-- Create global temporary view
CREATE OR REPLACE GLOBAL TEMP VIEW global_dept_summary AS
SELECT 
    department,
    COUNT(*) as emp_count,
    ROUND(AVG(salary), 2) as avg_salary,
    SUM(salary) as total_salary
FROM employees_managed
GROUP BY department;

-- COMMAND ----------

-- Query global temp view (must use global_temp schema!)
SELECT * FROM global_temp.global_dept_summary ORDER BY avg_salary DESC;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 9.3 Stored/Permanent View (Persisted)
-- MAGIC - Persisted in metastore
-- MAGIC - Available across sessions and clusters
-- MAGIC - Definition stored, not data

-- COMMAND ----------

-- Create permanent/stored view
CREATE OR REPLACE VIEW v_employee_details AS
SELECT 
    e.emp_id,
    e.emp_name,
    e.department,
    e.salary,
    e.hire_date,
    DATEDIFF(current_date(), e.hire_date) as days_employed,
    ROUND(DATEDIFF(current_date(), e.hire_date) / 365.25, 1) as years_employed,
    CASE 
        WHEN e.salary >= 80000 THEN 'Senior'
        WHEN e.salary >= 65000 THEN 'Mid-Level'
        ELSE 'Junior'
    END as salary_band
FROM employees_managed e
WHERE e.is_active = true;

-- COMMAND ----------

-- Query permanent view
SELECT * FROM v_employee_details ORDER BY years_employed DESC;

-- COMMAND ----------

-- Verify permanent view exists in metastore
SHOW VIEWS IN delta_demo;

-- COMMAND ----------

-- See view definition
SHOW CREATE TABLE v_employee_details;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 9.4 Views Comparison Summary

-- COMMAND ----------

-- Comparison table (run as documentation)
SELECT * FROM VALUES
    ('TEMP VIEW', 'CREATE TEMP VIEW', 'Current Session', 'No', 'temp_view_name'),
    ('GLOBAL TEMP VIEW', 'CREATE GLOBAL TEMP VIEW', 'All Sessions (Same App)', 'global_temp database', 'global_temp.view_name'),
    ('PERMANENT VIEW', 'CREATE VIEW', 'Persistent', 'Yes (Metastore)', 'schema.view_name')
AS t(view_type, create_syntax, scope, persisted, access_pattern);

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ---
-- MAGIC # Section 10: VACUUM (Cleanup)
-- MAGIC ---

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 10.1 Understanding VACUUM
-- MAGIC - Removes data files no longer referenced by Delta table
-- MAGIC - Default retention: 7 days (168 hours)
-- MAGIC - **WARNING**: After VACUUM, time travel to before retention period is not possible!

-- COMMAND ----------

-- Check table history first
DESCRIBE HISTORY employees_managed;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 10.2 VACUUM DRY RUN (Safe Preview)

-- COMMAND ----------

-- Preview what would be deleted (SAFE - doesn't delete anything)
VACUUM employees_managed RETAIN 168 HOURS DRY RUN;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 10.3 VACUUM Execution

-- COMMAND ----------

-- Actually remove old files (use with caution!)
-- VACUUM employees_managed RETAIN 168 HOURS;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ### 10.4 VACUUM with Shorter Retention (Requires Override)
-- MAGIC **WARNING**: This can break time travel. Only for advanced use cases.

-- COMMAND ----------

-- To use retention < 7 days, you must set this property first
-- SET spark.databricks.delta.retentionDurationCheck.enabled = false;

-- Then you can run:
-- VACUUM employees_managed RETAIN 24 HOURS;

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ---
-- MAGIC # Section 11: Cleanup (Optional)
-- MAGIC ---

-- COMMAND ----------

-- Drop all demo objects (uncomment to run)
/*
DROP TABLE IF EXISTS employees_managed;
DROP TABLE IF EXISTS employees_external;
DROP TABLE IF EXISTS engineering_team;
DROP TABLE IF EXISTS employee_summary;
DROP TABLE IF EXISTS high_earners;
DROP TABLE IF EXISTS employees_shallow_clone;
DROP TABLE IF EXISTS employees_shallow_clone_v0;
DROP TABLE IF EXISTS employees_deep_clone;
DROP TABLE IF EXISTS sales_transactions;
DROP VIEW IF EXISTS v_employee_details;
DROP SCHEMA IF EXISTS delta_demo CASCADE;
*/

-- COMMAND ----------

-- MAGIC %md
-- MAGIC ---
-- MAGIC # Quick Reference Summary
-- MAGIC ---
-- MAGIC
-- MAGIC | Concept | Command | Key Points |
-- MAGIC |---------|---------|------------|
-- MAGIC | Managed Table | `CREATE TABLE t (...)` | No LOCATION, Delta manages data |
-- MAGIC | External Table | `CREATE TABLE t (...) LOCATION '...'` | User manages data location |
-- MAGIC | CTAS | `CREATE TABLE t AS SELECT ...` | Create + populate in one step |
-- MAGIC | History | `DESCRIBE HISTORY t` | View all versions |
-- MAGIC | Time Travel (Version) | `SELECT * FROM t VERSION AS OF n` | Query specific version |
-- MAGIC | Time Travel (Time) | `SELECT * FROM t TIMESTAMP AS OF 'ts'` | Query at specific time |
-- MAGIC | Restore | `RESTORE TABLE t TO VERSION AS OF n` | Rollback table state |
-- MAGIC | Shallow Clone | `CREATE TABLE c SHALLOW CLONE t` | Metadata only, references original files |
-- MAGIC | Deep Clone | `CREATE TABLE c DEEP CLONE t` | Full independent copy |
-- MAGIC | Optimize | `OPTIMIZE t` | Compact small files |
-- MAGIC | Z-Order | `OPTIMIZE t ZORDER BY (cols)` | Co-locate data for faster queries |
-- MAGIC | Temp View | `CREATE TEMP VIEW v AS ...` | Session-scoped |
-- MAGIC | Global Temp View | `CREATE GLOBAL TEMP VIEW v AS ...` | App-scoped, use global_temp.v |
-- MAGIC | Permanent View | `CREATE VIEW v AS ...` | Persisted in metastore |
-- MAGIC | Vacuum | `VACUUM t RETAIN n HOURS` | Remove old files |